%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import plotly.express as px
df=pd.read_csv('Train.csv',na_values=['?','nan'])
df.head()
| timestamp | Soil humidity 1 | Irrigation field 1 | Soil humidity 2 | Irrigation field 2 | Soil humidity 3 | Irrigation field 3 | Soil humidity 4 | Irrigation field 4 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 23-02-2019 | 67.92 | 0.0 | 55.72 | 0.0 | -1.56 | 1.0 | 26.57 | 1.0 | 19.52 | 55.04 | 101.50 | 2.13 | 6.30 | 225.00 |
| 1 | 23-02-2019 | 67.89 | 0.0 | 55.74 | 0.0 | -1.51 | 1.0 | 26.58 | 1.0 | 19.49 | 55.17 | 101.50 | 2.01 | 10.46 | 123.75 |
| 2 | 23-02-2019 | 67.86 | 0.0 | 55.77 | 0.0 | -1.47 | 1.0 | 26.59 | 1.0 | 19.47 | 55.30 | 101.51 | 1.90 | 14.63 | 22.50 |
| 3 | 23-02-2019 | 67.84 | 0.0 | 55.79 | 0.0 | -1.42 | 1.0 | 26.61 | 1.0 | 19.54 | 54.20 | 101.51 | 2.28 | 16.08 | 123.75 |
| 4 | 23-02-2019 | 67.81 | 0.0 | 55.82 | 0.0 | -1.38 | 1.0 | 26.62 | 1.0 | 19.61 | 53.09 | 101.51 | 2.66 | 17.52 | 225.00 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 28049 entries, 0 to 28048 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 timestamp 28049 non-null object 1 Soil humidity 1 8917 non-null float64 2 Irrigation field 1 10066 non-null float64 3 Soil humidity 2 26304 non-null float64 4 Irrigation field 2 28029 non-null float64 5 Soil humidity 3 16086 non-null float64 6 Irrigation field 3 17236 non-null float64 7 Soil humidity 4 26306 non-null float64 8 Irrigation field 4 28029 non-null float64 9 Air temperature (C) 23995 non-null float64 10 Air humidity (%) 23995 non-null float64 11 Pressure (KPa) 23995 non-null float64 12 Wind speed (Km/h) 23995 non-null float64 13 Wind gust (Km/h) 23995 non-null float64 14 Wind direction (Deg) 23995 non-null float64 dtypes: float64(14), object(1) memory usage: 3.2+ MB
df.describe()
| Soil humidity 1 | Irrigation field 1 | Soil humidity 2 | Irrigation field 2 | Soil humidity 3 | Irrigation field 3 | Soil humidity 4 | Irrigation field 4 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 8917.000000 | 10066.000000 | 26304.000000 | 28029.000000 | 16086.000000 | 17236.000000 | 26306.000000 | 28029.000000 | 23995.000000 | 23995.000000 | 23995.000000 | 23995.000000 | 23995.000000 | 23995.000000 |
| mean | 63.025653 | 0.177727 | 18.767424 | 0.095615 | 25.046062 | 0.245243 | 17.512501 | 0.211531 | 24.263780 | 58.521052 | 101.131418 | 9.896898 | 41.744905 | 93.983796 |
| std | 9.801099 | 0.382302 | 22.903736 | 0.294068 | 19.998016 | 0.430243 | 14.247310 | 0.408401 | 6.756751 | 30.073448 | 0.218448 | 4.325666 | 24.168987 | 100.502500 |
| min | 36.000000 | 0.000000 | -45.000000 | 0.000000 | -38.000000 | 0.000000 | -30.000000 | 0.000000 | 11.220000 | 0.590000 | 100.500000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 58.210000 | 0.000000 | 3.850000 | 0.000000 | 10.220000 | 0.000000 | 8.000000 | 0.000000 | 18.580000 | 33.980000 | 100.980000 | 6.830000 | 21.820000 | 22.500000 |
| 50% | 63.000000 | 0.000000 | 23.000000 | 0.000000 | 26.610000 | 0.000000 | 19.600000 | 0.000000 | 22.150000 | 61.310000 | 101.120000 | 9.530000 | 37.240000 | 22.500000 |
| 75% | 68.000000 | 0.000000 | 35.310000 | 0.000000 | 41.360000 | 0.000000 | 29.050000 | 0.000000 | 29.590000 | 86.360000 | 101.260000 | 12.460000 | 59.275000 | 180.000000 |
| max | 88.000000 | 1.000000 | 59.000000 | 1.000000 | 68.000000 | 1.000000 | 47.000000 | 1.000000 | 45.560000 | 96.000000 | 101.860000 | 31.360000 | 133.330000 | 337.500000 |
df.isnull().sum()
timestamp 0 Soil humidity 1 19132 Irrigation field 1 17983 Soil humidity 2 1745 Irrigation field 2 20 Soil humidity 3 11963 Irrigation field 3 10813 Soil humidity 4 1743 Irrigation field 4 20 Air temperature (C) 4054 Air humidity (%) 4054 Pressure (KPa) 4054 Wind speed (Km/h) 4054 Wind gust (Km/h) 4054 Wind direction (Deg) 4054 dtype: int64
col_names=df.columns.values
print(col_names)
['timestamp' 'Soil humidity 1' 'Irrigation field 1' 'Soil humidity 2' 'Irrigation field 2' 'Soil humidity 3' 'Irrigation field 3' 'Soil humidity 4' 'Irrigation field 4' 'Air temperature (C)' 'Air humidity (%)' 'Pressure (KPa)' 'Wind speed (Km/h)' 'Wind gust (Km/h)' 'Wind direction (Deg)']
fig = px.scatter(df, y='Soil humidity 1',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 1','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
sh1_df=df[['Soil humidity 1','Irrigation field 1','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh1_df.head()
| Soil humidity 1 | Irrigation field 1 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 67.92 | 0.0 | 19.52 | 55.04 | 101.50 | 2.13 | 6.30 | 225.00 |
| 1 | 67.89 | 0.0 | 19.49 | 55.17 | 101.50 | 2.01 | 10.46 | 123.75 |
| 2 | 67.86 | 0.0 | 19.47 | 55.30 | 101.51 | 1.90 | 14.63 | 22.50 |
| 3 | 67.84 | 0.0 | 19.54 | 54.20 | 101.51 | 2.28 | 16.08 | 123.75 |
| 4 | 67.81 | 0.0 | 19.61 | 53.09 | 101.51 | 2.66 | 17.52 | 225.00 |
corr_matrix=sh1_df.corr()
corr_matrix["Soil humidity 1"].sort_values(ascending=False)
Soil humidity 1 1.000000 Irrigation field 1 0.170783 Air temperature (C) 0.127492 Wind direction (Deg) 0.037806 Pressure (KPa) 0.015354 Wind gust (Km/h) -0.057814 Wind speed (Km/h) -0.095873 Air humidity (%) -0.322538 Name: Soil humidity 1, dtype: float64
fig = px.scatter(df, y='Soil humidity 2',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 2','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
sh2_df=df[['Soil humidity 2','Irrigation field 2','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh2_df.head()
| Soil humidity 2 | Irrigation field 2 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 55.72 | 0.0 | 19.52 | 55.04 | 101.50 | 2.13 | 6.30 | 225.00 |
| 1 | 55.74 | 0.0 | 19.49 | 55.17 | 101.50 | 2.01 | 10.46 | 123.75 |
| 2 | 55.77 | 0.0 | 19.47 | 55.30 | 101.51 | 1.90 | 14.63 | 22.50 |
| 3 | 55.79 | 0.0 | 19.54 | 54.20 | 101.51 | 2.28 | 16.08 | 123.75 |
| 4 | 55.82 | 0.0 | 19.61 | 53.09 | 101.51 | 2.66 | 17.52 | 225.00 |
corr_matrix=sh2_df.corr()
corr_matrix["Soil humidity 2"].sort_values(ascending=False)
Soil humidity 2 1.000000 Pressure (KPa) 0.289770 Wind direction (Deg) 0.211908 Irrigation field 2 0.126485 Air temperature (C) -0.005512 Wind gust (Km/h) -0.114120 Wind speed (Km/h) -0.172694 Air humidity (%) -0.297771 Name: Soil humidity 2, dtype: float64
fig = px.scatter(df, y='Soil humidity 3',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 3','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
sh3_df=df[['Soil humidity 3','Irrigation field 3','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh3_df.head()
| Soil humidity 3 | Irrigation field 3 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|
| 0 | -1.56 | 1.0 | 19.52 | 55.04 | 101.50 | 2.13 | 6.30 | 225.00 |
| 1 | -1.51 | 1.0 | 19.49 | 55.17 | 101.50 | 2.01 | 10.46 | 123.75 |
| 2 | -1.47 | 1.0 | 19.47 | 55.30 | 101.51 | 1.90 | 14.63 | 22.50 |
| 3 | -1.42 | 1.0 | 19.54 | 54.20 | 101.51 | 2.28 | 16.08 | 123.75 |
| 4 | -1.38 | 1.0 | 19.61 | 53.09 | 101.51 | 2.66 | 17.52 | 225.00 |
corr_matrix=sh3_df.corr()
corr_matrix["Soil humidity 3"].sort_values(ascending=False)
Soil humidity 3 1.000000 Wind direction (Deg) 0.150181 Pressure (KPa) 0.078811 Air temperature (C) -0.015056 Wind gust (Km/h) -0.054601 Air humidity (%) -0.137482 Wind speed (Km/h) -0.161307 Irrigation field 3 -0.222228 Name: Soil humidity 3, dtype: float64
fig = px.scatter(df, y='Soil humidity 4',x='timestamp',color='Air humidity (%)', hover_data=['Irrigation field 4','Air temperature (C)','Pressure (KPa)', 'Wind speed (Km/h)','Wind gust (Km/h)','Wind direction (Deg)'])
fig.show()
sh4_df=df[['Soil humidity 4','Irrigation field 4','Air temperature (C)', 'Air humidity (%)','Pressure (KPa)','Wind speed (Km/h)', 'Wind gust (Km/h)','Wind direction (Deg)']]
sh4_df.head()
| Soil humidity 4 | Irrigation field 4 | Air temperature (C) | Air humidity (%) | Pressure (KPa) | Wind speed (Km/h) | Wind gust (Km/h) | Wind direction (Deg) | |
|---|---|---|---|---|---|---|---|---|
| 0 | 26.57 | 1.0 | 19.52 | 55.04 | 101.50 | 2.13 | 6.30 | 225.00 |
| 1 | 26.58 | 1.0 | 19.49 | 55.17 | 101.50 | 2.01 | 10.46 | 123.75 |
| 2 | 26.59 | 1.0 | 19.47 | 55.30 | 101.51 | 1.90 | 14.63 | 22.50 |
| 3 | 26.61 | 1.0 | 19.54 | 54.20 | 101.51 | 2.28 | 16.08 | 123.75 |
| 4 | 26.62 | 1.0 | 19.61 | 53.09 | 101.51 | 2.66 | 17.52 | 225.00 |
corr_matrix=sh4_df.corr()
corr_matrix["Soil humidity 4"].sort_values(ascending=False)
Soil humidity 4 1.000000 Irrigation field 4 0.210986 Wind direction (Deg) 0.062345 Air humidity (%) -0.035806 Wind gust (Km/h) -0.049485 Pressure (KPa) -0.050020 Air temperature (C) -0.065449 Wind speed (Km/h) -0.082555 Name: Soil humidity 4, dtype: float64